clear

** Creating paths and importing data
cd "W:\Research\Current research\China\Analyses\Output"
global data "W:\Research\Current research\China\Analyses\Output"
global output "W:\Research\Current research\China\Analyses\Output"
global path "W:\Research\Current research\China\Analyses\Output"
qui import excel using "W:\Research\Current research\China\Analyses\newdataset.xlsx", firstrow


** Encode to numeric so dummies can be used
qui encode IndustryCode, gen(Industrycode)
qui encode C_Education, gen(C_educ)
qui encode L_Education, gen(L_educ)
qui encode Companyregistrationtype, gen(Companyregtype)

** Create new variables
qui gen Firmage = year - Companyestablishmentyear
qui gen NetProfitRatio = NetProfit/Sales
qui gen logTotalAsset  = log(TotalAsset)

* Current ratio; some current liabilities are identically 0
qui gen CurrentRatio = Currentassets / Currentliabilities
qui replace CurrentRatio = Currentassets if Currentliabilities==0

* Sales growth
qui gen SalesGrowth = sqrt(Sales/Sales_2) - 1
qui replace SalesGrowth = Sales / Sales_1 - 1 if (Sales_2==. | Sales_2==0)
qui replace SalesGrowth = 1000 if (Sales_2==. | Sales_2==0) & Sales_1==0

* Maturity
qui gen Maturity =  Contractexpirationdate - ContractStartDate
qui gen Long = Maturity > 4e+10 
qui replace Long = . if Outcome==0 
qui gen Maturity2 = 1
qui replace Maturity2 = 2 if Long==1 
qui replace Maturity2 = . if Outcome==0 

* Collateral
qui replace Collateral_amount = 0 if missing(Collateral_amount)
qui gen Collateral= .
qui replace Collateral = 0 if Outcome == 1 & Collateral_amount == 0
qui replace Collateral= 1 if Outcome == 1 & Collateral_amount > 0

* Bank personnel characteristics: level of education
qui gen C_Edulevel = 1 + C_Bachelor + 2 * C_Master
qui gen L_Edulevel = 1 + L_Bachelor + 2 * L_Master

* Bank personnel characteristics: dummy for high biospheric values (over median)
qui gen L_HighBio = 0
qui replace L_HighBio = 1 if L_Biosphericvalues > 5.75
qui gen C_HighBio = 0 
qui replace C_HighBio = 1 if C_Biosphericvalues > 5
qui fvset base 2 L_educ
qui fvset base 2 C_educ

* Renaming variables
label variable Recommendationlevel "Recommendation"
label variable TruthfulnessandReasonableness "Truthful, reasonable"
label variable RepaymentWillingness "Willingness to repay"
label variable Repaymentability "Ability to repay" 
rename Recommendationlevel Recommend
rename Environmentalrisk Envirorisk
label variable Recommend "Recommend"

* Winsorizing
qui winsor2 CurrentRatio, cuts(5 95)
qui winsor2 DebtAssetRatio, cuts(5 95) 
qui winsor2 NetProfitRatio, cuts(5 95)
qui winsor2 SalesGrowth, cuts(5 95)

* Defining variable group for firm characteristics control variables
global lpm_controls Firmage logTotalAsset CurrentRatio_w DebtAssetRatio_w NetProfitRatio_w SalesGrowth_w i.Companyregtype i.Industrycode

* Defining variable group for old soft variables (assesments of Customer manager, excluding the recommendation level and the new categories)
global oldsofts "TruthfulnessandReasonableness Repaymentability RepaymentWillingness"

* Some new variables
bysort C_ID: gen C_ncases = _N
bysort L_ID: gen L_ncases = _N

qui egen L_BioQ = cut(L_Biosphericvalues), group(5)
qui egen C_BioQ = cut(C_Biosphericvalues), group(5)

qui egen C_BioResQ = cut(C_Respectingtheearth), group(5)
qui egen C_BioUniQ = cut(C_Unitywithnature), group(5)
qui egen C_BioProQ = cut(C_Protectingtheenvironment), group(5)
qui egen C_BioPreQ = cut(C_Preventingpollution), group(5)
   
* Centering environmental risk score
qui egen Envirorisk_mean = mean(Envirorisk)
qui gen Envirorisk_center = Envirorisk - Envirorisk_mean

* Calculating predicted values of the recommendation decision of customer managers
qui regress Recommend $lpm_controls $oldsofts Safetyrisk Envirorisk, vce(cluster C_ID)
qui predict Predicted_Rec
qui gen Resid_Rec = Recommend - Predicted_Rec

* Dividing predicted value into deciles
qui egen C_PredD = cut(Predicted_Rec), group(10)

* Creating a separate group for middle-3 decile group
qui gen C_HighPredGroup = 0
qui replace C_HighPredGroup  = 1 if (C_PredD > 2 & C_PredD < 6)

* Centering centered environmental risk score for the bottom-3 deciles
qui egen Envirorisk_mean_dec02 = mean(Envirorisk) if C_PredD < 3
qui gen Envirorisk_center_dec02  = Envirorisk - Envirorisk_mean_dec02 

* Centering centered environmental risk score for the middle-3 deciles
qui egen Envirorisk_mean_dec35 = mean(Envirorisk) if (C_PredD > 2 & C_PredD < 6)
qui gen Envirorisk_center_dec35 = Envirorisk - Envirorisk_mean_dec35

* Centering centered environmental risk score for the bottom-3 and middle-3 deciles
qui egen Envirorisk_mean_dec05 = mean(Envirorisk) if (C_PredD < 6)
qui gen Envirorisk_center_dec05 = Envirorisk - Envirorisk_mean_dec05

** Customer manager environmental variables from new data
qui gen C_Envinfo = (C_AmongouremployeesImoneo + C_Iknowprettymuchaboutenvir + (6 - C_Comparedtomostotherpeople) + C_Itiseasyformetoevaluate) / 4
qui gen C_Envbeliefs = (C_Financinggreenfirmsisgoo + C_Ifafirmbecomesmoreenviron) / 2
qui gen C_Envvalues = (C_Ichoosetheenvironmentallyf + (6 -C_Firmsshouldalwaysputprofit)) / 2

qui egen C_EnvinfoQ = cut(C_Envinfo), group(5)
qui egen C_EnvbeliefsQ = cut(C_Envbeliefs), group(5)
qui egen C_EnvvaluesQ = cut(C_Envvalues), group(5)

qui egen C_EnvinfoH = cut(C_Envinfo), group(2)

** Loan officer environmental variables from new data
qui gen L_Envinfo = (L_AmongouremployeesImoneo + L_Iknowprettymuchaboutenvir + (6 - L_Comparedtomostotherpeople) + L_Itiseasyformetoevaluate) / 4
qui gen L_Envbeliefs = (L_Financinggreenfirmsisgoo + L_Ifafirmbecomesmoreenviron) / 2
qui gen L_Envvalues = (L_Ichoosetheenvironmentallyf + (6 -L_Firmsshouldalwaysputprofit)) / 2

qui egen L_EnvinfoQ = cut(L_Envinfo), group(5)
qui egen L_EnvbeliefsQ = cut(L_Envbeliefs), group(5)
qui egen L_EnvvaluesQ = cut(L_Envvalues), group(5)

qui egen L_EnvinfoH = cut(L_Envinfo), group(2)


* Defining some other variables
qui egen C_Envrisk_variance = var(Envirorisk), by(C_ID)

qui bysort C_ID : egen C_Cor_env_safety = corr(Envirorisk Safetyrisk)
qui bysort C_ID : egen C_Cor_env_recommend = corr(Envirorisk Recommend)
qui bysort C_ID : egen C_Cor_env_truthful = corr(Envirorisk TruthfulnessandReasonableness)
qui bysort C_ID : egen C_Cor_env_repaymentability = corr(Envirorisk Repaymentability)
qui bysort C_ID : egen C_Cor_env_willingness = corr(Envirorisk RepaymentWillingness)

qui bysort L_ID : egen L_Cor_outcome_env = corr(Outcome Envirorisk)


* Divide industries into 3 groups based on their greenness, as measured by their mean environmental score in Table A1 Panel C 

* Brownest industries
gen Ind_env_score = 0
replace Ind_env_score = 1 if IndustryCode == "Construction" |  IndustryCode == "Transport, warehouse, postal"

* Middle industries in terms of greenness
replace Ind_env_score = 2 if  IndustryCode == "Manufacturing" | IndustryCode == "Other"  | IndustryCode == "Accommodation and catering"  | IndustryCode == "Residents and repair serv."

* Greenest industries
replace Ind_env_score = 3 if  IndustryCode == "Wholesale and retail"  | IndustryCode == "Leasing and business serv." | IndustryCode == "Agriculture" | IndustryCode == "IT"  


*****
* Create Female owner dummy
qui gen FemaleOwner = 0
qui replace FemaleOwner = 1 if ActualControllergender == 2
qui gen L_FemaleInteraction = L_Female * FemaleOwner
qui gen C_FemaleInteraction = C_Female * FemaleOwner


//Programs

//RUN LPM to predict loan granting dummy
capture program drop predict_grant_probs
program define predict_grant_probs
    * Run LPM to predict granting probabilities (replicate T7 specification 2)
    regress Outcome $lpm_controls Recommend $oldsofts Safetyrisk Envirorisk, vce(cluster L_ID) 
    predict p_approve_hat if e(sample)
end



// Set variable for DebtAssetRatio
global DebtAssetRatio DebtAssetRatio



